{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 基础rag"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 构建提示词\n",
    "def question_answering(context, query):\n",
    "    prompt = \"使用{context}中的信息来回答{query}中的用户问题。如果所提供的\\\n",
    "    上下文中没有相关信息，就尝试自己回答，但是要告诉用户你的回答没有任何相关的上下文依据。请简洁明了，回答的内容不得超过80个词。\"\n",
    "    response = get_completion(instruction, prompt, model=\"gpt-3.5-turbo\")\n",
    "    answer = response.choices[0].message[\"content\"]\n",
    "    return answer\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 先进的rag"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 构建提示词\n",
    "question = \"对于分类问题最好的机器学习算法是什么?\"\n",
    "retrieved_docs = [\"文档1: SVM算法被广泛应用于 ...\", \"文档2:随机森林算法是一种鲁棒性很高的...\"]\n",
    "template = \"请基于这些文档{docs}，帮助回答以下问题{question}\"\n",
    "full_prompt = template.format(question=question, docs=\" \".join(retrieved_docs))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 大模型主导的rag"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 头文件\n",
    "from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser\n",
    "from langchain.prompts import StringPromptTemplate\n",
    "from langchain.chat_models import AzureChatOpenAI\n",
    "from langchain import LLMChain\n",
    "from langchain.utilities import BingSearchAPIWrapper\n",
    "from typing import List, Union\n",
    "from langchain.schema import AgentAction, AgentFinish\n",
    "import re\n",
    "import os\n",
    "\n",
    "# 工具\n",
    "os.environ[\"BING_SEARCH_URL\"] = \"xxx\"\n",
    "os.environ[\"BING_SUBSCRIPTION_KEY\"] = \"xxx\"\n",
    "search = BingSearchAPIWrapper()\n",
    "tools = [ Tool( name = \"Search\", func=search.run,\n",
    "        description=\"useful for when you need to answer questions about current events\"\n",
    "            )\n",
    "]\n",
    "\n",
    "#提示词模板\n",
    "class CustomPromptTemplate(StringPromptTemplate):\n",
    "    template: str\n",
    "    tools: List[Tool]\n",
    "    def format(self, **kwargs) -> str:\n",
    "        intermediate_steps = kwargs.pop(\"intermediate_steps\")\n",
    "        thoughts = \"\"\n",
    "        for action, observation in intermediate_steps:\n",
    "            thoughts += action.log\n",
    "            thoughts += f\"\\nObservation: {observation}\\nThought: \"\n",
    "        kwargs[\"agent_scratchpad\"] = thoughts\n",
    "        kwargs[\"tools\"] = \"\\n\".join([f\"{tool.name}: {tool.description}\" for tool in self.tools])\n",
    "        kwargs[\"tool_names\"] = \", \".join([tool.name for tool in self.tools])\n",
    "        return self.template.format(**kwargs)\n",
    "\n",
    "# 大语言模型\n",
    "os.environ[\"OPENAI_API_TYPE\"] = \"azure\"\n",
    "os.environ[\"OPENAI_API_BASE\"] = \"xxx\"\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"xxx\"\n",
    "llm = AzureChatOpenAI(deployment_name=\"gpt-35-turbo\", openai_api_version=\"2023-03-15-preview\")\n",
    "\n",
    "# 输出解析器\n",
    "class CustomOutputParser(AgentOutputParser):\n",
    "        def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:\n",
    "            if \"Final Answer:\" in llm_output:\n",
    "                return AgentFinish(\n",
    "                    return_values={\"output\": llm_output.split(\"Final Answer:\")[-1].strip()},\n",
    "                    log=llm_output,\n",
    "                )\n",
    "            regex = r\"Action\\s*\\d*\\s*:(.*?)\\nAction\\s*\\d*\\s*Input\\s*\\d*\\s*:[\\s]*(.*)\"\n",
    "            match = re.search(regex, llm_output, re.DOTALL)\n",
    "            if not match:\n",
    "                raise ValueError(f\"Could not parse LLM output: `{llm_output}`\")\n",
    "            action = match.group(1).strip()\n",
    "            action_input = match.group(2)\n",
    "            return AgentAction(tool=action, tool_input=action_input.strip(\" \").strip('\"'), log=llm_output)\n",
    "output_parser = CustomOutputParser()\n",
    "\n",
    "# Agent\n",
    "llm_chain = LLMChain(llm=llm, prompt=prompt)\n",
    "tool_names = [tool.name for tool in tools]\n",
    "agent = LLMSingleActionAgent(\n",
    "    llm_chain=llm_chain, \n",
    "    output_parser=output_parser,\n",
    "    stop=[\"\\nObservation:\"], \n",
    "    allowed_tools=tool_names\n",
    ")\n",
    "\n",
    "# 执行\n",
    "agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)\n",
    "agent_executor.run(\"generate a short blog post to review the plot of the movie Avatar 2. It should be a positive review.\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "chatpdf",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
